AudioSpectrogram

音频频谱(Audio Spectrogram)是一种常用的音频信号特征表示方式,通过将时域音频信号转换为频域信号,提供了时间和频率的双重信息。

参数说明:
  • params - 频谱图参数配置结构体指针

  • workspace - 工作空间缓冲区结构体指针

  • core_mask - 核掩码,指定参与计算的处理器核(仅共享存储版本)

结构体定义:
 1    typedef struct {
 2        // 输入
 3        float* input;           // 输入数据地址
 4        int input_len;          // 输入数据长度
 5
 6        // 输出
 7        float* output;          // 输出数据地址
 8        int* output_shape;      // 输出数据形状
 9
10        // 配置参数
11        int pad;                // 填充大小
12        WindowType window_type; // 窗函数类型
13        int n_fft;              // FFT点数
14        int hop_length;         // 帧移长度
15        int win_length;         // 窗长度
16        float power;            // 功率值
17        bool normalized;        // 是否归一化
18        bool center;            // 是否中心化
19        BorderType pad_mode;    // 填充模式
20        bool onesided;          // 是否单边频谱
21    } SpectrogramParam;
22    typedef struct {
23        float* fft_window;          // FFT窗函数缓冲区
24        float* fft_window_later;    // 后续FFT窗缓冲区
25        float* input_data_pad;      // 填充后的输入数据
26        float* input_data;          // 输入数据缓冲区
27        float* input_win;           // 加窗输入数据
28        float* exp_complex;         // 复数指数缓冲区
29        float* spec_f;              // 频谱频率缓冲区
30        float* output_onsided;      // 单边输出缓冲区
31    } WorkspaceParam;
支持平台:

FT78NE MT7004

备注

  • FT78NE 支持 fp32

  • MT7004 支持 fp32

共享存储版本:

void fp_audio_spectrogram_s(SpectrogramParam *params, WorkspaceParam *workspace, int core_mask)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3
 4int main(int argc, char* argv[]) {
 5    float* input_data = (float*)0x81000000;
 6    float* output = (float*)0x82000000;
 7    int output_shape[2] = {0, 0, 0};
 8    SpectrogramParam* spec_params = (SpectrogramParam*)0x83200000;
 9    WorkspaceParam* spec_workspace = (WorkspaceParam*)0x83400000;
10
11    // 结构体 SpectrogramParam
12    spec_params->input = input_data;
13    spec_params->input_len = 4000;
14    spec_params->output = output;
15    spec_params->output_shape = output_shape;
16    spec_params->pad = 0;
17    spec_params->window_type = kHann;
18    spec_params->n_fft = 32;
19    spec_params->hop_length = 16;
20    spec_params->win_length = 32;
21    spec_params->power = 2.0f;
22    spec_params->normalized = false;
23    spec_params->center = false;
24    spec_params->pad_mode = kConstant;
25    spec_params->onesided = true;
26
27    int core_mask = 0xff;
28    //spec_workspace里每个中间缓冲区指针分配地址
29    //...
30    fp_audio_spectrogram_s(spec_params, spec_workspace, core_mask);
31    return 0;
32}

私有存储版本:

void fp_audio_spectrogram_p(SpectrogramParam *params, WorkspaceParam *workspace)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3
 4int main(int argc, char* argv[]) {
 5    float* input_data = (float*)0x10810000;
 6    float* output_mfcc = (float*)0x10820000;
 7    SpectrogramParam* spec_params = (SpectrogramParam*)0x10830000;
 8    WorkspaceParam* spec_workspace = (WorkspaceParam*)0x10840000;
 9    int output_shape[2] = {0, 0, 0};
10
11    spec_params->input = input_data;
12    spec_params->input_len = 4000;
13    spec_params->output = output;
14    spec_params->output_shape = output_shape;
15    spec_params->pad = 0;
16    spec_params->window_type = kHann;
17    spec_params->n_fft = 32;
18    spec_params->hop_length = 16;
19    spec_params->win_length = 32;
20    spec_params->power = 2.0f;
21    spec_params->normalized = false;
22    spec_params->center = false;
23    spec_params->pad_mode = kConstant;
24    spec_params->onesided = true;
25
26    //为spec_workspace里每个中间缓冲区指针分配地址
27    //...
28
29    fp_audio_spectrogram_p(spec_params, spec_workspace);
30    return 0;
31}